vllm serve "/home/corex/models/qwen/Qwen3-Reranker-8B/" \
	--served-model-name Qwen3-Reranker-8B \
	--host 0.0.0.0 \
	--port 8000 \
	--max-model-len 8192 \
	--task score \
	--hf_overrides '{"architectures": ["Qwen3ForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}'


# Test the rerank model
# curl "http://0.0.0.0:8000/v1/rerank" -H "Content-Type: application/json" -d '{"model": "/home/weights_15/Qwen3-Reranker-8B/", "query": "What is the price of coca cola?", "documents": ["The price of coca cola is 400 dollars.", "Coca cola is my favorite drink", "Sprite is my favorite drink", "Qwen3 is powerful"]}'